import mlflow
import pandas as pd
import mlflow
import pandas as pd
def generate_recommendations_table(experiment_ids, prefix_note="sizes_acts", group_type="sim"):
all_rows = []
for exp_id in experiment_ids:
runs = mlflow.search_runs(
experiment_ids=[exp_id],
output_format="list"
)
for run in runs:
if prefix_note not in run.data.params.get("note") or run.data.params.get("group_type") != group_type:
continue
dim = int(run.data.params.get("embedding_dim", 0))
dataset = run.data.params.get("dataset", f"Exp-{exp_id}")
topk = int(run.data.params.get("top_k", 0))
activation = run.data.params.get("topk_inference", "False")
aggregation_function = run.data.params.get("SAE_fusion_strategy", "none")
row_key = (dim, topk, aggregation_function, activation)
metrics = {
(dataset, "G/mean"): run.data.metrics.get("CommonItemsNDCG20/median"),
(dataset, "U/mean"): run.data.metrics.get("NDCG20/mean"),
(dataset, "U/min"): run.data.metrics.get("NDCG20/min"),
(dataset, "Pop"): run.data.metrics.get("Popularity/mean"),
}
all_rows.append((row_key, metrics))
# Build DataFrame from records
records = {}
for key, metrics in all_rows:
if key not in records:
records[key] = {}
records[key].update(metrics)
df = pd.DataFrame.from_dict(records, orient="index")
df.index.names = ["Dimensions", "TopK", "Aggregation", "Activation"]
# Sort and reindex columns by dataset then metric
df = df.sort_index(axis=1, level=[0, 1]).sort_values(
by=["Dimensions", "TopK", "Aggregation", "Activation"]
)
return df.reset_index()
def highlight_top3_dark_to_light(s):
# Colors from dark to light
colors = ['mediumseagreen', 'lightgreen']
# Get sorted unique values in descending order
top_values = s.nlargest(2).unique()
# Assign background color depending on rank
styles = ['' for _ in s]
for rank, value in enumerate(top_values):
styles = [
f'background-color: {colors[rank]}' if v == value and styles[i] == '' else styles[i]
for i, v in enumerate(s)
]
return styles
def highlight_bottom3_dark_to_light(s):
# Colors from dark to light
colors = ['mediumblue', 'lightblue', 'paleturquoise']
# Get sorted unique values in ascending order
bottom_values = s.nsmallest(3).unique()
# Assign background color depending on rank
styles = ['' for _ in s]
for rank, value in enumerate(bottom_values):
styles = [
f'background-color: {colors[rank]}' if v == value and styles[i] == '' else styles[i]
for i, v in enumerate(s)
]
return styles
Aggregation functions¶
We have already selected for each aggregation function if it is better with topk activation function or not.
SAE group recommendations table aggregated across all sizes¶
Group type: Similar
Each value is a mean accros all 9 sizes variant
experiments = ['523100174176986081', '333391697323445885']
# Select only the desired columns for aggregation
table = generate_recommendations_table(experiments, prefix_note="aggregations", group_type="sim")
row_indexes_selected = [
('average', 'True'),
('common_features', 'False'),
('max', 'True'),
('square_average', 'False'),
('topk', 'False'),
('wcom', 'True'),
]
table = table[
table.set_index(['Aggregation', 'Activation']).index.isin(row_indexes_selected)
].reset_index()
selected_columns = []
for dataset in ["MovieLens", "LastFM1k"]:
for metric in ["G/mean", "U/mean", "U/min", "Pop"]:
selected_columns.append((dataset, metric))
# Group by Aggregation and Activation and aggregate
agg_table = (
table
.groupby(["Aggregation", "Activation"])[selected_columns]
.agg(['mean'])
).round(3)
selected_columns = []
for dataset in ["MovieLens", "LastFM1k"]:
for metric in ["G/mean", "U/mean", "U/min"]:
selected_columns.append((dataset, metric, 'mean'))
std_selected_columns = []
for dataset in ["MovieLens", "LastFM1k"]:
for metric in ["G/mean", "U/mean", "U/min"]:
std_selected_columns.append((dataset, metric, 'std'))
agg_table.style.apply(highlight_top3_dark_to_light, subset=selected_columns)#.highlight_min((std_selected_columns))
| MovieLens | LastFM1k | ||||||||
|---|---|---|---|---|---|---|---|---|---|
| G/mean | U/mean | U/min | Pop | G/mean | U/mean | U/min | Pop | ||
| mean | mean | mean | mean | mean | mean | mean | mean | ||
| Aggregation | Activation | ||||||||
| average | True | 0.664000 | 0.689000 | 0.578000 | 0.536000 | 0.582000 | 0.816000 | 0.649000 | 0.610000 |
| common_features | False | 0.580000 | 0.656000 | 0.527000 | 0.493000 | 0.585000 | 0.801000 | 0.626000 | 0.622000 |
| max | True | 0.697000 | 0.682000 | 0.564000 | 0.526000 | 0.561000 | 0.813000 | 0.654000 | 0.591000 |
| square_average | False | 0.508000 | 0.663000 | 0.529000 | 0.515000 | 0.601000 | 0.810000 | 0.643000 | 0.628000 |
| topk | False | 0.635000 | 0.687000 | 0.573000 | 0.540000 | 0.604000 | 0.811000 | 0.644000 | 0.607000 |
| wcom | True | 0.652000 | 0.683000 | 0.564000 | 0.530000 | 0.595000 | 0.807000 | 0.630000 | 0.628000 |
SAE group recommendations table aggregated across all sizes¶
Group type: random
Each value is a mean accros all 9 sizes variant
experiments = ['523100174176986081', '333391697323445885']
# Select only the desired columns for aggregation
table = generate_recommendations_table(experiments, prefix_note="aggregations", group_type="random")
row_indexes_selected = [
('average', 'True'),
('common_features', 'False'),
('max', 'True'),
('square_average', 'False'),
('topk', 'False'),
('wcom', 'True'),
]
table = table[
table.set_index(['Aggregation', 'Activation']).index.isin(row_indexes_selected)
].reset_index()
selected_columns = []
for dataset in ["MovieLens", "LastFM1k"]:
for metric in ["G/mean", "U/mean", "U/min", "Pop"]:
selected_columns.append((dataset, metric))
# Group by Aggregation and Activation and aggregate
agg_table = (
table
.groupby(["Aggregation", "Activation"])[selected_columns]
.agg(['mean'])
).round(3)
selected_columns = []
for dataset in ["MovieLens", "LastFM1k"]:
for metric in ["G/mean", "U/mean", "U/min"]:
selected_columns.append((dataset, metric, 'mean'))
std_selected_columns = []
for dataset in ["MovieLens", "LastFM1k"]:
for metric in ["G/mean", "U/mean", "U/min"]:
std_selected_columns.append((dataset, metric, 'std'))
agg_table.style.apply(highlight_top3_dark_to_light, subset=selected_columns)#.highlight_min((std_selected_columns))
| MovieLens | LastFM1k | ||||||||
|---|---|---|---|---|---|---|---|---|---|
| G/mean | U/mean | U/min | Pop | G/mean | U/mean | U/min | Pop | ||
| mean | mean | mean | mean | mean | mean | mean | mean | ||
| Aggregation | Activation | ||||||||
| average | True | 0.585000 | 0.689000 | 0.545000 | 0.572000 | 0.410000 | 0.757000 | 0.570000 | 0.661000 |
| common_features | False | 0.605000 | 0.621000 | 0.478000 | 0.505000 | 0.439000 | 0.740000 | 0.535000 | 0.663000 |
| max | True | 0.588000 | 0.679000 | 0.544000 | 0.563000 | 0.419000 | 0.756000 | 0.569000 | 0.642000 |
| square_average | False | 0.557000 | 0.660000 | 0.506000 | 0.542000 | 0.424000 | 0.756000 | 0.573000 | 0.680000 |
| topk | False | 0.591000 | 0.691000 | 0.546000 | 0.575000 | 0.401000 | 0.756000 | 0.563000 | 0.660000 |
| wcom | True | 0.578000 | 0.681000 | 0.537000 | 0.563000 | 0.372000 | 0.752000 | 0.552000 | 0.673000 |
SAE group recommendations table aggregated across all sizes¶
Group type: divergent
Each value is a mean accros all 9 sizes variant
experiments = ['523100174176986081', '333391697323445885']
# Select only the desired columns for aggregation
table = generate_recommendations_table(experiments, prefix_note="aggregations", group_type="div")
row_indexes_selected = [
('average', 'True'),
('common_features', 'False'),
('max', 'True'),
('square_average', 'False'),
('topk', 'False'),
('wcom', 'True'),
]
table = table[
table.set_index(['Aggregation', 'Activation']).index.isin(row_indexes_selected)
].reset_index()
selected_columns = []
for dataset in ["MovieLens", "LastFM1k"]:
for metric in ["G/mean", "U/mean", "U/min", "Pop"]:
selected_columns.append((dataset, metric))
# Group by Aggregation and Activation and aggregate
agg_table = (
table
.groupby(["Aggregation", "Activation"])[selected_columns]
.agg(['mean'])
).round(3)
selected_columns = []
for dataset in ["MovieLens", "LastFM1k"]:
for metric in ["G/mean", "U/mean", "U/min"]:
selected_columns.append((dataset, metric, 'mean'))
std_selected_columns = []
for dataset in ["MovieLens", "LastFM1k"]:
for metric in ["G/mean", "U/mean", "U/min"]:
std_selected_columns.append((dataset, metric, 'std'))
agg_table.style.apply(highlight_top3_dark_to_light, subset=selected_columns)#.highlight_min((std_selected_columns))
| MovieLens | LastFM1k | ||||||||
|---|---|---|---|---|---|---|---|---|---|
| G/mean | U/mean | U/min | Pop | G/mean | U/mean | U/min | Pop | ||
| mean | mean | mean | mean | mean | mean | mean | mean | ||
| Aggregation | Activation | ||||||||
| average | True | 0.126000 | 0.625000 | 0.443000 | 0.425000 | 0.462000 | 0.669000 | 0.454000 | 0.613000 |
| common_features | False | 0.128000 | 0.445000 | 0.217000 | 0.262000 | 0.422000 | 0.646000 | 0.392000 | 0.606000 |
| max | True | 0.113000 | 0.608000 | 0.432000 | 0.413000 | 0.455000 | 0.667000 | 0.456000 | 0.596000 |
| square_average | False | 0.134000 | 0.581000 | 0.381000 | 0.397000 | 0.534000 | 0.684000 | 0.460000 | 0.644000 |
| topk | False | 0.130000 | 0.638000 | 0.473000 | 0.478000 | 0.465000 | 0.661000 | 0.423000 | 0.596000 |
| wcom | True | 0.191000 | 0.616000 | 0.434000 | 0.428000 | 0.507000 | 0.674000 | 0.449000 | 0.626000 |